{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第四周作业根据活动关键词聚类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入必要工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据\n",
    "#import cPickle  python3名字变为 pickle 了\n",
    "import pickle\n",
    "\n",
    "import itertools\n",
    "import csv\n",
    "\n",
    "#处理事件字符串\n",
    "import datetime\n",
    "\n",
    "import numpy as np\n",
    "import pandas  as pd\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "from sklearn import metrics\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 第一步：抽取出只在训练集和测试集中出现的event，存为event_clusting.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of uniqueUsers :3391\n",
      "number of uniqueEvents :13418\n"
     ]
    }
   ],
   "source": [
    "# 统计训练集中有多少不同的用户的events\n",
    "# 统计训练集中有多少不同的用户的events\n",
    "#set（）存 无序 非重复 元素，从train/test读取 user_id和event_id 加入set()自动剔除重复数据\n",
    "uniqueUsers = set()\n",
    "uniqueEvents = set()\n",
    "\n",
    "#循环读取train.csv文件和test.csv文件中的user_id和event_id 字段加入uniqueUsers和uniqueEvents\n",
    "for filename in [\"train.csv\", \"test.csv\"]:\n",
    "    #打开文件'r'方式打开为str,以‘rb'打开为 byte，读取时需要加decode()\n",
    "    f = open(filename, 'r')\n",
    "    \n",
    "    #忽略第一行（列名字）\n",
    "    f.readline().strip().split(\",\")\n",
    "    \n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.strip().split(',')  #把line按照，分隔符读入cols\n",
    "        uniqueUsers.add(cols[0])   #第一列为用户ID\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "        \n",
    "    f.close()\n",
    "\n",
    "#把len()返回元素个数\n",
    "n_uniqueUsers = len(uniqueUsers)\n",
    "n_uniqueEvents = len(uniqueEvents)\n",
    "\n",
    "print(\"number of uniqueUsers :%d\" % n_uniqueUsers)\n",
    "print(\"number of uniqueEvents :%d\" % n_uniqueEvents)\n",
    "\n",
    "#dok_matrix为稀疏矩阵\n",
    "#userEventScores = ss.dok_matrix((n_uniqueUsers, n_uniqueEvents))\n",
    "userIndex = dict()\n",
    "eventIndex = dict()\n",
    "\n",
    "#重新编码用户索引字典\n",
    "for i, u in enumerate(uniqueUsers):\n",
    "    userIndex[u] = i\n",
    "    \n",
    "#重新编码活动索引字典    \n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "    \n",
    "n_records = 0\n",
    "ftrain = open(\"events.csv\", 'r')\n",
    "reader = csv.reader(ftrain)\n",
    "#dictReader读出csv字段名，写入 event_clusting.csv用\n",
    "dic_read = csv.DictReader(ftrain)\n",
    "#print(dic_read.fieldnames)\n",
    "event_new = open(\"event_clusting.csv\",'w',newline='')\n",
    "#open写文件加入 newline=''可以避免写csv时出现空行\n",
    "writer = csv.writer(event_new)\n",
    "writer.writerow(dic_read.fieldnames)\n",
    "for line in reader:\n",
    "    #判断event.csv中event_id是否在，eventIndex.keys()中，存在说明event_id在train.csv和test.csv中都存在    \n",
    "    if line[0] in eventIndex.keys():\n",
    "        writer.writerow(line)\n",
    "ftrain.close()\n",
    "event_new.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 第二步：对event.csv分离出来的数据进行聚类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event_id          0\n",
       "user_id           0\n",
       "start_time        0\n",
       "city           7092\n",
       "state          8868\n",
       "zip           12004\n",
       "country        7065\n",
       "lat            5356\n",
       "lng            5356\n",
       "c_1               0\n",
       "c_2               0\n",
       "c_3               0\n",
       "c_4               0\n",
       "c_5               0\n",
       "c_6               0\n",
       "c_7               0\n",
       "c_8               0\n",
       "c_9               0\n",
       "c_10              0\n",
       "c_11              0\n",
       "c_12              0\n",
       "c_13              0\n",
       "c_14              0\n",
       "c_15              0\n",
       "c_16              0\n",
       "c_17              0\n",
       "c_18              0\n",
       "c_19              0\n",
       "c_20              0\n",
       "c_21              0\n",
       "              ...  \n",
       "c_72              0\n",
       "c_73              0\n",
       "c_74              0\n",
       "c_75              0\n",
       "c_76              0\n",
       "c_77              0\n",
       "c_78              0\n",
       "c_79              0\n",
       "c_80              0\n",
       "c_81              0\n",
       "c_82              0\n",
       "c_83              0\n",
       "c_84              0\n",
       "c_85              0\n",
       "c_86              0\n",
       "c_87              0\n",
       "c_88              0\n",
       "c_89              0\n",
       "c_90              0\n",
       "c_91              0\n",
       "c_92              0\n",
       "c_93              0\n",
       "c_94              0\n",
       "c_95              0\n",
       "c_96              0\n",
       "c_97              0\n",
       "c_98              0\n",
       "c_99              0\n",
       "c_100             0\n",
       "c_other           0\n",
       "Length: 110, dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "train = pd.read_csv('C:\\\\Users\\\\dell\\\\Desktop\\\\ai\\\\four-week\\\\homework\\\\event_clusting.csv')\n",
    "\n",
    "#train.head()\n",
    "#train.info()\n",
    "#查看字段空值情况\n",
    "train.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>c_7</th>\n",
       "      <th>c_8</th>\n",
       "      <th>c_9</th>\n",
       "      <th>c_10</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 101 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   c_1  c_2  c_3  c_4  c_5  c_6  c_7  c_8  c_9  c_10   ...     c_92  c_93  \\\n",
       "0    2    0    2    0    0    0    0    0    0     0   ...        0     1   \n",
       "1    2    0    2    0    0    0    0    0    0     0   ...        0     0   \n",
       "2    0    0    0    0    0    0    0    0    0     0   ...        0     0   \n",
       "3    1    0    2    1    0    0    0    0    0     0   ...        0     0   \n",
       "4    1    1    0    0    0    0    0    2    0     0   ...        0     0   \n",
       "\n",
       "   c_94  c_95  c_96  c_97  c_98  c_99  c_100  c_other  \n",
       "0     0     0     0     0     0     0      0        9  \n",
       "1     0     0     0     0     0     0      0        7  \n",
       "2     0     0     0     0     0     0      0       12  \n",
       "3     0     0     0     0     0     0      0        8  \n",
       "4     0     0     0     0     0     0      0        9  \n",
       "\n",
       "[5 rows x 101 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#提取train中活动的关键词\n",
    "train=train.iloc[:,9:]\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(13418, 80)\n",
      "[9.58993312e-01 3.31809636e-02 3.09685800e-03 1.66963166e-03\n",
      " 2.52965322e-04 1.94139376e-04 1.67114687e-04 1.26020645e-04\n",
      " 1.09248450e-04 1.02445807e-04 9.61627226e-05 9.02182581e-05\n",
      " 8.46269538e-05 7.50866451e-05 6.63218833e-05 6.52577303e-05\n",
      " 6.17647850e-05 5.62190684e-05 5.28798538e-05 5.06268401e-05\n",
      " 4.89913304e-05 4.73003439e-05 4.44297410e-05 4.24812048e-05\n",
      " 4.12134983e-05 3.92686767e-05 3.75212266e-05 3.63368338e-05\n",
      " 3.50207695e-05 3.26585871e-05 3.14393121e-05 3.09234176e-05\n",
      " 2.92960260e-05 2.85184122e-05 2.81286850e-05 2.75186573e-05\n",
      " 2.69479028e-05 2.60787628e-05 2.52976331e-05 2.41340903e-05\n",
      " 2.26598225e-05 2.19403485e-05 2.16896852e-05 2.11730317e-05\n",
      " 2.09563191e-05 2.00859704e-05 1.89729273e-05 1.87076147e-05\n",
      " 1.78814628e-05 1.74151395e-05 1.71435783e-05 1.65334836e-05\n",
      " 1.63427619e-05 1.60419319e-05 1.58226260e-05 1.55976801e-05\n",
      " 1.52468643e-05 1.48166400e-05 1.43572705e-05 1.43018036e-05\n",
      " 1.41806044e-05 1.37519276e-05 1.35086500e-05 1.34099440e-05\n",
      " 1.29960222e-05 1.27017539e-05 1.24096776e-05 1.20740299e-05\n",
      " 1.16845278e-05 1.13019685e-05 1.10376722e-05 1.08007722e-05\n",
      " 1.06909675e-05 1.00904333e-05 1.00419274e-05 9.82254239e-06\n",
      " 9.32993567e-06 9.11648568e-06 8.97798228e-06 8.54208371e-06]\n",
      "[1.31064884e+04 4.53481696e+02 4.23245219e+01 2.28187284e+01\n",
      " 3.45725774e+00 2.65328803e+00 2.28394367e+00 1.72231453e+00\n",
      " 1.49309022e+00 1.40011902e+00 1.31424859e+00 1.23300604e+00\n",
      " 1.15659012e+00 1.02620345e+00 9.06416115e-01 8.91872418e-01\n",
      " 8.44134601e-01 7.68341715e-01 7.22704924e-01 6.91913158e-01\n",
      " 6.69560773e-01 6.46450188e-01 6.07217878e-01 5.80587383e-01\n",
      " 5.63261734e-01 5.36682006e-01 5.12799740e-01 4.96612733e-01\n",
      " 4.78626183e-01 4.46342417e-01 4.29678678e-01 4.22627987e-01\n",
      " 4.00386550e-01 3.89758961e-01 3.84432590e-01 3.76095388e-01\n",
      " 3.68294930e-01 3.56416460e-01 3.45740820e-01 3.29838770e-01\n",
      " 3.09690064e-01 2.99857068e-01 2.96431272e-01 2.89370208e-01\n",
      " 2.86408413e-01 2.74513423e-01 2.59301549e-01 2.55675542e-01\n",
      " 2.44384587e-01 2.38011381e-01 2.34299976e-01 2.25961857e-01\n",
      " 2.23355280e-01 2.19243858e-01 2.16246621e-01 2.13172303e-01\n",
      " 2.08377730e-01 2.02497887e-01 1.96219719e-01 1.95461658e-01\n",
      " 1.93805237e-01 1.87946544e-01 1.84621688e-01 1.83272680e-01\n",
      " 1.77615643e-01 1.73593901e-01 1.69602116e-01 1.65014844e-01\n",
      " 1.59691549e-01 1.54463141e-01 1.50851023e-01 1.47613329e-01\n",
      " 1.46112637e-01 1.37905181e-01 1.37242256e-01 1.34243937e-01\n",
      " 1.27511519e-01 1.24594314e-01 1.22701398e-01 1.16744006e-01]\n"
     ]
    }
   ],
   "source": [
    "#做下PCA降维\n",
    "from sklearn.decomposition import PCA\n",
    "#对数据进行PCA降维，PCA参数比较少，说明如下：\n",
    "#1、n_components指定降维后的维数，此时设置应大于等于的整数\n",
    "#当设置为[0-1]之间的数时，为主成分的方差和占原始维度所有特征方差和的比例阈值\n",
    "#2、whiten ：判断是否进行白化。所谓白化，就是对降维后的数据的每个特征进行归一化，\n",
    "#让方差都为1.对于PCA降维本身来说，一般不需要白化。\n",
    "#如果你PCA降维后有后续的数据处理动作，可以考虑白化。默认值是False，即不进行白化。\n",
    "#3、svd_solver：即指定奇异值分解SVD的方法，由于特征分解是奇异值分解SVD的一个特例，\n",
    "#一般的PCA库都是基于SVD实现的。\n",
    "#有4个可以选择的值：{‘auto’, ‘full’, ‘arpack’, ‘randomized’}。\n",
    "#randomized一般适用于数据量大，数据维度多同时主成分数目比例又较低的PCA降维，\n",
    "#它使用了一些加快SVD的随机算法。 full则是传统意义上的SVD，使用了scipy库对应的实现。\n",
    "#arpack和randomized的适用场景类似，区别是randomized使用的是scikit-learn自己的SVD实现，\n",
    "#而arpack直接使用了scipy库的sparse SVD实现。\n",
    "#默认是auto，即PCA类会自己去在前面讲到的三种算法里面去权衡，\n",
    "#选择一个合适的SVD算法来降维。一般来说，使用默认值就够了\n",
    "pca = PCA(n_components=80)\n",
    "pca.fit(train)\n",
    "\n",
    "train_pca = pca.transform(train)\n",
    "\n",
    "# 降维后的特征维数\n",
    "print(train_pca.shape)\n",
    "#打印降维后各主成分方差值占总方差值得比例\n",
    "print(pca.explained_variance_ratio_)\n",
    "#打印降维后各主成分的方差值\n",
    "print(pca.explained_variance_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 因为没有标签信息，kmeans只能使用内部评价法做评测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义一个Minibatchkmeans方法，供循环验证K值使用\n",
    "#当样本数大于1万时，首先考虑Minibatchkmeans,Minibatchkmeans相比kmeans \n",
    "#尽量保持准确性的情况下，减小运算时间\n",
    "\n",
    "#导入工具包\n",
    "from sklearn import metrics\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "import time\n",
    "def K_cluster_analysis(K, train_pca):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    #K-means,在训练集上训练\n",
    "    # MiniBatchKMeans(n_clusters=8, init=’k-means++’, max_iter=100, \n",
    "    #                batch_size=100, verbose=0, compute_labels=True\n",
    "    #                , random_state=None, tol=0.0, max_no_improvement=10\n",
    "    #                , init_size=None, n_init=3, reassignment_ratio=0.01)\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(train_pca)\n",
    "    \n",
    "    CH_score = metrics.silhouette_score(train_pca,mb_kmeans.predict(train_pca))\n",
    "       \n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.4378397059453619, time elaps:7\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.2224924328640753, time elaps:6\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.2312363601721984, time elaps:6\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.1632322486963238, time elaps:6\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.1520027030477404, time elaps:7\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.12566812240593853, time elaps:7\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.12181694217192006, time elaps:6\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.10863064136103062, time elaps:7\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.09930761686223903, time elaps:7\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.06998103480704315, time elaps:7\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数（聚类数目K）搜索范围\n",
    "Ks = [10, 20, 30,40,50,60,70,80,90,100]\n",
    "CH_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, train_pca)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 8\n",
      "CH_score: 0.45649668624437606, time elaps:7\n",
      "K-means begin with clusters: 9\n",
      "CH_score: 0.45902790124121423, time elaps:6\n",
      "K-means begin with clusters: 10\n",
      "CH_score: 0.414910104119804, time elaps:6\n",
      "K-means begin with clusters: 11\n",
      "CH_score: 0.4148814052346759, time elaps:6\n",
      "K-means begin with clusters: 12\n",
      "CH_score: 0.38261743806987203, time elaps:6\n"
     ]
    }
   ],
   "source": [
    "# 上面结果显示 k=10时，CH_SCORE最大，尝试调小K值看下结果\n",
    "Ks = [8,9,10,11,12]\n",
    "CH_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, train_pca)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1c7867ac2b0>]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHl1JREFUeJzt3XuYHHWd7/H3JwkhBoJcMgHNhUTIqiEiYZuIy4pB1xBuicplEzwLeIuIMYBcxBV4Hsmqa5YVRFFO9ODiHiEbUWQMgaAIHPFIYCLXIZvDEBEGECYBoxghBL7nj6oJnUnPTE2mu6svn9fz9JOp6l9Nf6cYPlP97apfKSIwM7PmMCTvAszMrHoc+mZmTcShb2bWRBz6ZmZNxKFvZtZEHPpmZk3EoW9m1kQc+mZmTcShb2bWRIblXUBPo0ePjokTJ+ZdhplZXVm9evX6iGjpb1zNhf7EiRNpa2vLuwwzs7oi6fdZxrm9Y2bWRBz6ZmZNJFPoS5olaa2kDkkX9DHuBEkhqVC07kBJv5HULukhSSPKUbiZmQ1cvz19SUOBK4EPAJ3AvZJaI+KRHuNGAQuBVUXrhgH/G/iniHhA0l7AK2Ws38zMBiDLkf50oCMi1kXEZmApMKfEuEXAYuClonUzgQcj4gGAiNgQEa8OsmYzM9tBWUJ/LPBk0XJnum4rSdOA8RGxvMe2fwOEpJWSfivp/EFVa2Zmg5LllE2VWLf1dluShgCXAaf18v3/HjgE2ATcJml1RNy2zQtI84H5ABMmTMhUuJmZDVyW0O8ExhctjwOeLloeBUwF7pAEsA/QKml2uu2dEbEeQNIK4GBgm9CPiCXAEoBCoeD7N5ZZBLzwAjz33LaPv/4VPvpR2HPPvCs0s2rJEvr3ApMlTQKeAuYCJ3c/GREbgdHdy5LuAM6NiDZJjwHnSxoJbAbeS/KuwAZp0ybo6oJnn90+zHs+urpgy5bS3+eXv4Tly0Gl3s+ZWcPpN/QjYoukBcBKYChwdUS0S7oEaIuI1j62fUHS10n+cASwIiJuKlPtDWXLFtiwof8A7368+GLp77PrrjBmTPLYd1845JDXl3s+rrsOzj4bfvADOPXU6v68ZpYPRdRWN6VQKEQjTMMQAX/6U/YQ37Ah2aanYcOgpaX34O75GDkye42vvQYzZsCDD0J7O4wd2+8mZlaj0s9LC/2Nq7m5d2rZyy8nrZKegd1bi2Xz5tLfZ489Xg/pt78d3vve3kN8991hSIWumx4yBK6+Gg48EObPd5vHrBk0dei/9ho8/3z2o/GNG0t/nxEjYO+9k5B+05vgne/sPcRHj4bhw6v7c/Zl//3hX/8VzjzTbR6zZtBw7Z0XX8we4uvXw6slLhUbMiQJ56wtlV13re8jZLd5zOpf07V37rkHjjgiOaullN12ez2k99sP3v3u3kN8zz1h6NDq1p8nt3nMmkfDhP64cXDGGaVDvKUlacFY79zmMWsODdfesR3nNo9Z/cra3vF8+rZVd5tn8+akzVNjxwNmVgYOfdtGd5tnxYqkzWNmjcWhb9tZsADe856kv//UU3lXY2bl5NC37bjNY9a4HPpWkts8Zo3JoW+9cpvHrPE49K1XbvOYNR6HvvXJbR6zxuLQt365zWPWOBz61i+3ecwah0PfMnGbx6wxZAp9SbMkrZXUIemCPsadICkkFdLliZL+Kun+9HFVuQq36nObx6z+9Rv6koYCVwJHAVOAeZKmlBg3ClgIrOrx1GMRcVD6OL0MNVtO3OYxq39ZjvSnAx0RsS4iNgNLgTklxi0CFgMvlbE+qzFu85jVtyyhPxZ4smi5M123laRpwPiIWF5i+0mS7pN0p6T37HipVivc5jGrX1lCv9Q9lLa+sZc0BLgMOKfEuGeACRExDfgccK2k3bZ7AWm+pDZJbV1dXdkqt9y4zWNWv7KEficwvmh5HPB00fIoYCpwh6THgUOBVkmFiHg5IjYARMRq4DHgb3q+QEQsiYhCRBRaWlp27CexqnKbx6w+ZQn9e4HJkiZJGg7MBVq7n4yIjRExOiImRsRE4G5gdkS0SWpJPwhG0luAycC6sv8Ulgu3eczqT7+hHxFbgAXASmANsCwi2iVdIml2P5sfDjwo6QHgeuD0iHh+sEVbbXCbx6z++B65NmhXXJEc7f/Hf/iG6mZ58T1yrWrc5jGrHw59GzS3eczqh0PfysJn85jVB4e+lY3bPGa1z6FvZeM2j1ntc+hbWbnNY1bbHPpWdm7zmNUuh76Vnds8ZrXLoW8V4TaPWW1y6FvFuM1jVnsc+lYxbvOY1R6HvlWU2zxmtcWhbxXnNo9Z7XDoW8W5zWNWOxz6VhVu85jVBoe+VY3bPGb5c+hb1bjNY5a/TKEvaZaktZI6JF3Qx7gTJIWkQo/1EyS9KOncwRZs9c1tHrN89Rv66Y3NrwSOAqYA8yRNKTFuFLAQWFXi21wG3Dy4Uq1RuM1jlp8sR/rTgY6IWBcRm4GlwJwS4xYBi4GXildK+iCwDmgfZK3WINzmMctPltAfCzxZtNyZrttK0jRgfEQs77F+F+DzwJf6egFJ8yW1SWrr6urKVLjVN7d5zPKRJfRVYt3WYzNJQ0jaN+eUGPcl4LKIeLGvF4iIJRFRiIhCS0tLhpKsEbjNY1Z9WUK/ExhftDwOeLpoeRQwFbhD0uPAoUBr+mHuu4DF6fqzgH+WtKAMdVsDcJvHrPqyhP69wGRJkyQNB+YCrd1PRsTGiBgdERMjYiJwNzA7Itoi4j1F6y8HvhIR3yr/j2H1ym0es+rqN/QjYguwAFgJrAGWRUS7pEskza50gdb43OYxqx5Fjb2nLhQK0dbWlncZVmUdHXDggXDEEbB8OajUJ0lm1itJqyOi0N84X5FrNcFtHrPqcOhbzXCbx6zyHPpWM3w2j1nlOfStphS3ea65Ju9qzBqPQ99qTneb56yz3OYxKzeHvtUct3nMKsehbzXJbR6zynDoW81ym8es/Bz6VrPc5jErP4e+1TS3eczKy6FvNc9tHrPycehbzXObx6x8HPpWF9zmMSsPh77VDbd5zAbPoW91w20es8Fz6FtdcZvHbHAc+lZ33OYx23GZQl/SLElrJXVIuqCPcSdIivSm6EiaLun+9PGApA+Vq3BrXm7zmO24fkNf0lDgSuAoYAowT9KUEuNGAQuBVUWrHwYKEXEQMAv4n5KGlaNwa25u85jtmCxH+tOBjohYFxGbgaXAnBLjFgGLgZe6V0TEpvTG6gAjAB+TWdm4zWM2cFlCfyzwZNFyZ7puK0nTgPERsbznxpLeJakdeAg4veiPQPGY+ZLaJLV1dXUN6Aew5uU2j9nAZQl9lVi39X8vSUOAy4BzSm0cEasi4gDgEOALkkaUGLMkIgoRUWhpaclWuRlu85gNVJbQ7wTGFy2PA54uWh4FTAXukPQ4cCjQ2v1hbreIWAP8JR1rVjZu85hllyX07wUmS5okaTgwF2jtfjIiNkbE6IiYGBETgbuB2RHRlm4zDEDSvsBbgcfL/UNYc3Obxyy7fkM/7cEvAFYCa4BlEdEu6RJJs/vZ/O+BByTdD9wAnBER6wdbtFlPbvOYZaOoscOiQqEQbW1teZdhdei112DGDHjwQWhvh7Fj+93ErGFIWh0Rhf7G+Ypcaxhu85j1z6FvDcVtHrO+OfSt4fhsHrPeOfSt4bjNY9Y7h741JLd5zEpz6FvDcpvHbHsOfWtYbvOYbc+hbw3NbR6zbTn0reG5zWP2Ooe+NTy3ecxe59C3puA2j1nCoW9Nw20eM4e+NRG3ecwc+tZk3OaxZufQt6bjNo81M4e+NR23eayZOfStKbnNY80qU+hLmiVpraQOSRf0Me4ESdF9U3RJH5C0WtJD6b/vK1fhZoPlNo81o35DX9JQ4ErgKGAKME/SlBLjRgELgVVFq9cDx0XEO4BTgf8sR9Fm5eA2jzWjLEf604GOiFgXEZuBpcCcEuMWAYuBl7pXRMR9EfF0utgOjJC08yBrNisbt3ms2WQJ/bHAk0XLnem6rSRNA8ZHxPI+vs/xwH0R8XLPJyTNl9Qmqa2rqytDSWbl4zaPNZMsoa8S67a+EZY0BLgMOKfXbyAdAHwN+FSp5yNiSUQUIqLQ0tKSoSSz8nGbx5pJltDvBMYXLY8Dni5aHgVMBe6Q9DhwKNBa9GHuOOAG4JSIeKwcRZuVm9s81iyyhP69wGRJkyQNB+YCrd1PRsTGiBgdERMjYiJwNzA7Itok7Q7cBHwhIn5dgfrNysZtHmsG/YZ+RGwBFgArgTXAsohol3SJpNn9bL4A2B+4SNL96WPMoKs2qwC3eawZKGrsN7tQKERbW1veZVgTu+IKOPNM+P734bTT8q7GLBtJqyOi0N84X5Fr1oPbPNbIHPpmPbjNY43MoW9Wgs/msUbl0Dfrhds81ogc+ma9cJvHGpFD36wPbvNYoxmWdwFmtW7BArj++qTN8+qrMHTots9L1Vuu5msNdnnIEDj8cNhtN6yGOPTN+tHd5jnkEPjEJ/Kupr4ceCDceSfsvnvelVg3h75ZBvvvD088ARs2vL6uZ4/fy9sur10Lp54Ks2fDypXwhjdgNcChb5bRqFHJw7I5+ODkXdK8ecnj+uthmBMnd/4g18wq5h//MZnW4sYb4fTTfQZULfDfXTOrqAUL4LnnYNEiGDMGvvKVvCtqbg59M6u4L30Jnn0WvvrVJPjPOivvipqXQ9/MKk6Cb38b1q+Hs8+Glhb4yEfyrqo5uadvZlUxdCj88IcwY0YyZfUtt+RdUXNy6JtZ1YwYkXyoO3UqHH88rFqVd0XNJ1PoS5olaa2kDkkX9DHuBElRdH/cvSTdLulFSd8qV9FmVr922w1uvhn22QeOPhrWrMm7oubSb+hLGgpcCRwFTAHmSZpSYtwoYCFQ/Lf7JeAi4NyyVGtmDWGffeDWW2GnneDII+HJJ/OuqHlkOdKfDnRExLqI2AwsBeaUGLcIWEwS9ABExF8i4q7idWZmAPvtl/T1N25Mgr/4amernCyhPxYo/jvcma7bStI0YHxELC9jbWbW4A46KOnxr1sHxx4Lf/lL3hU1viyhrxLrtl5XJ2kIcBlwzo4WIWm+pDZJbV1dXTv6bcysDs2YAdddB/fcAyeeCK+8kndFjS1L6HcC44uWxwFPFy2PAqYCd0h6HDgUaO3+MDeLiFgSEYWIKLS0tGTdzMwaxIc+BFddlXzA+7GPwWuv5V1R48pycda9wGRJk4CngLnAyd1PRsRGYHT3sqQ7gHMjoq28pZpZI/vkJ5PpGi68MLlq99JLt5+v3wav39CPiC2SFgArgaHA1RHRLukSoC0iWvvaPj363w0YLumDwMyIeGTwpZtZo/nnf06ma/j612HvveH88/OuqPFkmoYhIlYAK3qsu7iXsTN6LE/cwdrMrMlIcPnl0NUFn/98Ml3DRz+ad1WNxXPvmFlNGTIkuR/x888nLZ/Ro+G44/KuqnF4GgYzqznDh8OPf5zciOWkk+Cuu/KuqHE49M2sJu26K9x0E+y7b3IO/0MP5V1RY3Dom1nNamlJ7q+7667JVbuPP553RfXPoW9mNW3ffZPpGv76V5g5Mzmt03acQ9/Mat7UqbB8OXR2JjNz/vnPeVdUvxz6ZlYXDjsMfvQjuP/+5Arel1/Ou6L65NA3s7pxzDFw9dVw221wyinw6qt5V1R/fJ6+mdWVU05J+vrnnZd80PvNb3q6hoFw6JtZ3Tn33GS6hksvTaZruOiivCuqHw59M6tLX/taMl3DxRcnR/ynn553RfXBoW9mdWnIEPjud2H9ejjjjCT4jz8+76pqnz/INbO6tdNOsGwZvPvdcPLJcPvteVdU+xz6ZlbXRo6En/0MJk+GOXPgvvvyrqi2OfTNrO7tuWcyXcMee8CsWdDRkXdFtcuhb2YNYexYuPXW5FaLM2fCM8/kXVFtcuibWcN461thxYrkPP6jjoKNG/OuqPZkCn1JsyStldQh6YI+xp0gKYpvii7pC+l2ayUdWY6izcx6c8gh8JOfwCOPwOzZ8NJLeVdUW/oNfUlDgSuBo4ApwDxJU0qMGwUsBFYVrZtCciP1A4BZwLfT72dmVjEzZyZ33/rVr2DePNiyJe+KakeWI/3pQEdErIuIzcBSYE6JcYuAxUDx39U5wNKIeDkifgd0pN/PzKyi5s2Db3wDfvpT+PSnISLvimpDltAfCzxZtNyZrttK0jRgfEQsH+i2ZmaV8tnPwhe/CN/7nqdq6JblitxSUxlt/ZspaQhwGXDaQLct+h7zgfkAEyZMyFCSmVk2ixYlH+x++cswZgwsXJh3RfnKEvqdwPii5XHA00XLo4CpwB1KprrbB2iVNDvDtgBExBJgCUChUPCbMDMrGwm+851kuoYzz4TRo5Ord5tVlvbOvcBkSZMkDSf5YLa1+8mI2BgRoyNiYkRMBO4GZkdEWzpurqSdJU0CJgP3lP2nMDPrw9ChcO218N73wqmnJhdyNat+Qz8itgALgJXAGmBZRLRLuiQ9mu9r23ZgGfAIcAvwmYjwbQ/MrOpGjIAbb4QDDkgmZrunSQ8/FTX2kXahUIi2tra8yzCzBvWHPyS3Xty4Ee66C972trwrKg9JqyOi0N84X5FrZk1ln32S6RqGDUvO5+/szLui6nLom1nT2W8/uPlm+OMf4cgj4fnn866oehz6ZtaUpk2D1tZkRs5jj4VNm/KuqDoc+mbWtGbMgOuug1Wr4MQT4ZVX8q6o8hz6ZtbUPvzh5Dz+FSvg4x9PpmZuZL5Hrpk1vfnzk6t2L7oouWr30kvzrqhyHPpmZiRz9Dz7LPz7v8Pee8N55+VdUWU49M3MSKZr+MY3oKsLzj8fWlrgtNPyrqr8HPpmZqkhQ+AHP0hO4fzEJ2CvveC44/Kuqrz8Qa6ZWZHhw+HHP05O6TzpJPj1r/OuqLwc+mZmPYwalZzNM2FCcg7/ww/nXVH5OPTNzEpoaUmmaxg5Mrlq9/e/z7ui8nDom5n1Yt99k2mYN21K5unp6sq7osFz6JuZ9WHqVFi+HJ54Ao4+Gv7857wrGhyHvplZPw47DH70I7jvvuQK3s2b865oxzn0zcwyOPbY5Abrv/gFnHJK/U7X4PP0zcwyOu20bS/euuKK5KKuepLpSF/SLElrJXVIuqDE86dLekjS/ZLukjQlXT9c0vfT5x6QNKPM9ZuZVdV558E558C3vgVf/nLe1Qxcv0f6koYCVwIfADqBeyW1RsQjRcOujYir0vGzga8Ds4BPAkTEOySNAW6WdEhE1OkbIzMzWLw4OeLvnqBt/vy8K8ouy5H+dKAjItZFxGZgKTCneEBE/KlocReg+8a7U4Db0jHPAX8E+r2Ho5lZLRsyJOnvH3MMfPrT8JOf5F1RdllCfyzwZNFyZ7puG5I+I+kxYDGwMF39ADBH0jBJk4C/BcYPrmQzs/zttBMsWwbvehfMmwe33553RdlkCf1SH1PEdisiroyI/YDPAxemq68m+SPRBlwO/F9gy3YvIM2X1CaprasRrn4ws6YwcmRyDv/++8OcOckpnbUuS+h3su3R+Tjg6T7GLwU+CBARWyLi7Ig4KCLmALsDj/bcICKWREQhIgotLS3Zqzczy9meeyZX7e6+Oxx1FDz2WN4V9S1L6N8LTJY0SdJwYC7QWjxA0uSixWNIg13SSEm7pF9/ANjS4wNgM7O6N25cMk/Pli3JdA1/+EPeFfWu39CPiC3AAmAlsAZYFhHtki5Jz9QBWCCpXdL9wOeAU9P1Y4DfSlpD0vb5p7L/BGZmNeBtb0tm5nz2WZg1CzZuzLui0hSxXXs+V4VCIdra2vIuw8xsh9x6a3JWz2GHwS23wIgR1XldSasjot+zIz0Ng5lZGc2cmdx968474eST4dVX865oWw59M7Mymzcvud/uDTck5/HXUkPFc++YmVXAwoVJf/8rX4G994ZFi/KuKOHQNzOrkH/5F3juueTfMWPgs5/NuyKHvplZxUjwne/A+vVw5pnJzJxz5+Zbk3v6ZmYVNGwYXHcdHH54Mg//rbfmW49D38yswkaMgBtvhClTkjtv3XNPfrU49M3MquCNb4Sbb056+0cfDWvX5lOHQ9/MrEre9KakvTN0aHI+/1NPVb8Gh76ZWRXtv39ype4LL8CRR8Lzz1f39R36ZmZVNm1a0uN/9FE47jjYtKl6r+3QNzPLwRFHwLXXwm9+AyedBK+8Up3XdeibmeXk+OOT8/hvugk++cnqTNfgi7PMzHL0qU8lV+1efHFyZs/ixZV9PYe+mVnOLrwQNmyAyZP7HztYDn0zs5xJcPnl1Xkt9/TNzJpIptCXNEvSWkkdki4o8fzpkh6SdL+kuyRNSdfvJOma9Lk1kr5Q7h/AzMyy6zf0JQ0FrgSOAqYA87pDvci1EfGOiDgIWAx8PV1/IrBzRLwD+FvgU5Imlql2MzMboCxH+tOBjohYFxGbgaXAnOIBEfGnosVdgO4TjwLYRdIw4A3AZqB4rJmZVVGWD3LHAk8WLXcC7+o5SNJngM8Bw4H3pauvJ/kD8QwwEjg7Iqp80bGZmXXLcqSvEuu2u4QgIq6MiP2AzwMXpqunA68CbwYmAedIest2LyDNl9Qmqa2rqytz8WZmNjBZQr8TGF+0PA54uo/xS4EPpl+fDNwSEa9ExHPAr4FCzw0iYklEFCKi0NLSkq1yMzMbsCyhfy8wWdIkScOBuUBr8QBJxZcUHAM8mn79BPA+JXYBDgX+e/Blm5nZjlBkmOxB0tHA5cBQ4OqI+LKkS4C2iGiV9A3gH4BXgBeABRHRLmlX4PskZ/0I+H5E/Fs/r9UF/H4QP9NoYP0gtq8U1zUwrmtgXNfANGJd+0ZEv62STKFfTyS1RcR2LaS8ua6BcV0D47oGppnr8hW5ZmZNxKFvZtZEGjH0l+RdQC9c18C4roFxXQPTtHU1XE/fzMx614hH+mZm1ou6DH1JZ0tql/SwpOskjejx/M6S/iudFXRVtSZ5y1DXaZK60tlI75f0iSrVdWZaU7uks0o8L0lXpPvrQUkH10hdMyRtLNpfF1ewlqslPSfp4aJ1e0r6uaRH03/36GXbU9Mxj0o6tYbqerVo37WWGlPmuk5M/1u+JqnXM1D6m7U3x7oeL5otuK0Kdf2bpP9O/5+7QdLuvWxb3v0VEXX1IJkL6HfAG9LlZcBpPcacAVyVfj0X+K8aqes04FtV3l9TgYdJ5j4aBvwCmNxjzNHAzSTXUhwKrKqRumYAy6u0nw4HDgYeLlq3GLgg/foC4GslttsTWJf+u0f69R5515U+92KV99fbgbcCdwCFXrYbCjwGvIVknq4HgCl515WOexwYXcX9NRMYln79tV5+v8q+v+rySJ8kJN6Qzt45ku2nhZgDXJN+fT3wfkml5hCqdl15eDtwd0RsiogtwJ3Ah3qMmQP8IBJ3A7tLelMN1FU1EfF/gJ6TARb/Hl3D69OLFDsS+HlEPB8RLwA/B2bVQF0VVaquiFgTEWv72bTfWXtzqquieqnr1vR3H+Bukilueir7/qq70I+Ip4BLSaZ4eAbYGBG39hi2dWbQdKduBPaqgboAjk/fzl0vaXyJ58vtYeBwSXtJGklyVN/zdUvNpDq2BuoCeLekByTdLOmACtfU094R8QxA+u+YEmPy2HdZ6gIYoWQiw7slVf0PQy/y2F9ZBXCrpNWS5lf5tT9G8m67p7Lvr7oL/bR/OYdk1s43k8zX/z96DiuxaUVPU8pY18+AiRFxIEk74xoqLCLWkLx1/DlwC8nbwy09hlV9f2Ws67ckl5a/E/gm8NNK1rSDqr7vBmBCJFd3ngxcLmm/vAuitvfXYRFxMMkNoz4j6fBqvKikL5L87v+w1NMl1g1qf9Vd6JPM8fO7iOiKiFeAnwB/12PM1plB01bLG9n+LXLV64qIDRHxcrr4XZK7iVVcRPyviDg4Ig4n2Q+P9hgy0JlUq1JXRPwpIl5Mv14B7CRpdKXrKvJsd5sr/fe5EmPy2HdZ6iIink7/XUfSz55W4bqyyOV3LYui/fUccANJa6Wi0g/+jwU+EmkTv4ey7696DP0ngEMljUz79O8H1vQY0wp0n0VxAvDLXnZoVevq0Sef3fP5SpE0Jv13AvBh4LoeQ1qBU9KzeA4laU09k3ddkvbp/ixG0nSS39cNla6rSPHv0anAjSXGrARmStojfbc3M12Xa11pPTunX48GDgMeqXBdWfQ7a28eJO0iaVT31yT/HR/ue6tBv+YskvuPzI6ITb0MK//+qsQn1ZV+AF8imaL5YeA/gZ2BS9KdBzAC+BHQAdwDvKVG6voq0E7SyrgdeFuV6voVyf/wDwDvT9edDpyefi2S+yA/BjxEH2c4VLmuBUX7627g7ypYy3Ukn8W8QnJ09XGSz4FuI3kHchuwZzq2AHyvaNuPpb9rHcBHa6EukneZD6X77iHg41Wo60Pp1y8DzwIr07FvBlYUbXs08P/S37cv1kJdJGfHPJA+2qtUVwdJv/7+9HFVz7oqsb98Ra6ZWROpx/aOmZntIIe+mVkTceibmTURh76ZWRNx6JuZNRGHvplZE3Hom5k1EYe+mVkT+f+AiMrvSPucjgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1c7fee81400>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#K=9时，聚类效果最好0.437 但是小于 0.5，好像还没有啥效果\n",
    "# 绘制不同PCA维数下模型的性能，找到最佳模型／参数（分数最高）\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAD8CAYAAABkbJM/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHKlJREFUeJzt3X90XPV55/H3I8k2yGDwr7aAsQUthHWc1rA6JqSUmEACod044bQbOEPw5peKgV1vuqcNrM7Z0/6hnpPstg3ZYIOSbeoUpcAm2bUXaL1AMAk5BEccArFjIMJYxmtCZDsYkI1tSc/+ce/Io9EdzZVmru69o8/rnDma+d7vjL6Xa+bR99dzzd0RERGppintBoiISD4oYIiISCwKGCIiEosChoiIxKKAISIisShgiIhILAoYIiISiwKGiIjEooAhIiKxtKTdgLgWLVrkbW1taTdDRCQ3nn322QPuvrhen5ebgNHW1kZvb2/azRARyQ0z66/n52lISkREYlHAEBGRWBQwREQkFgUMERGJRQFDRERiUcAQEZFYFDBERCQWBQwREYlFAUNERGJRwBARkVgUMEREJBYFDBERiUUBow563niDtqefpmnbNtqefpqeN95Iu0kiInWngFGjnjfe4NO7dtF/7BgO9B87xk27dnHOj36UdtNEROpKAaNG619+mRMR5ftPnKBl2zb1NkSkYShg1Ojg8HDFY8PAp3ftUtAQkYaggJGwE8BNzz/NaX99Gj0/60m7OSIiU6aAMR1mncHgiUFu+t5N3PrwrWm3RkRkShQwptnG3o3YXxltX2lTj0NEckUBo0ZXnXlm9UonDo8r6j/cz2c2f0ZBQ0RyoyXtBuTdYytXcvVPf8rjb74ZXWFkCPq+NvpyycFLuHjPv6XF5wDw3WcGeHjO17jl9qu5YvVF09FkEZEpUcCog8dWrgSCPRnrX36Zg0NDwYETh4NgMfA4EASL9lcLNNE85v3Hjg3z1b/dCqCgISKZZe6edhtiaW9v997e3rSbEctpf30agycGx5Vf88J/Ye7xBRO+d9Hi0ync/AEFDhGpmZk96+7t9fq81OYwzOxaM3vJzPrM7I602pGEe//NvZHlrcfnV33vgYG3uetvttK98fv1bpaISE1SCRhm1gzcDXwUWA7caGbL02hLEgrvK7Cufd24cid+b27rIz/jB9terGezRERqklYPYxXQ5+673f04cD+wJqW2JGLDH27gvuvvY+GpC0fLDJvUZ/zjxh/yw0U/ZJttY5tt46lFT/FGj3aNi0g60goY5wCvlbzeF5Y1lML7Chz4iwORvY04Dg0eYfjgydQjQweHePEzL/K9W9+grQ2amqCtDXq0MldEpkFaASPqT+1x4zVm1mFmvWbWOzAwMA3NSkaxt3Gi5cik3nf60Vnjyvy4c3zjbvr7wR36+6GjQ0FDRJKXVsDYB5xb8noJsL+8krt3u3u7u7cvXrx42hqXhML7Cvz5+utpaYn5n9zhshd/K/LQb3BszOsjR6Czs9YWiohMLK2A8RPgAjM7z8xmAzcAW1Jqy7S5YvVF3Lb+w/EqTzA//lbZPg4IehrqZYhIklIJGO4+BNwObAV2AQ+6+8402jLdrlh9Eddc977qFZvg6Yt+GXmolRGuYvzkt4amRCRJqe3DcPdH3P1Cd/9td+9Kqx1p6Fj3oVhB4+1To27NBLNxbucX48o1NCUiSVLywZR0rPsQ3/0/6yesEzXpXXQGQ5G9jL17a26aiEgkBYyULVp8evSBCSa9IVhm9jl2j3+bQ0sL3KrbbohInSlgpKxw8weYM6csB6TDij0LeM/+ifNOla+WKhoeho0bFTREpL4UMFJ2xeqLuOX2q0Z7GqcfmcVHnjuXK3eeW+WdEy6kAqC7uw4NFBEJKb15Blyx+qLR7LQv3/oy+58ctyUlUrVoPzxcpYKIyCSoh5ExF264kNVDq4nYajElWmYrIvWigJFRZ3ecXZfP0d4MEakXBYyMunDDhVXrHK7SDVlzDjz2+/AHD8Jr18NrH4f9n4fBJ+vVShGZSTSHkVPHga9ROaisOQe+tBJai1d4JPgxPAC/3hA8n/vB4Ofgk3D4Phg+AM2L4IybTh4TESlSDyPDTl1+6rgyJwgWX+Zf8Ti/WfG9X1xeEizKP+NYECAgCBa/3hAEEvxkQFEvRETKKWBk2KU7Lx0TNBx4rflUrmE137fKwQLg7PGxZozhA8HPw/cFAaRUaUARESnSkFTGXbrz0nFlNxNMZK9fDwcPRr9v/1FY0jrBBxu89gkqbuYoBhQRkSJzj3+f6TS1t7d7b29v2s3InEWLooPGZ38P/vI943sPce1u3s5zczYz6IeYt2gBV35yDSsuX1VbY0VkWpnZs+7eXq/PUw8j5+66K1g6e6TkZn6trXDln8P8JeFk9gDB4OMIQRKqKn8j7G7eztOzehj24wC8deAQD389WJs71aCx46ntPPHAZt46oAAkkleaw8i5QiFIAbJsGZgFP7u7g/K5HwxWPDUvBhzsdKrnEwGea9nMsB0fUzZ0/DhPPLB5Sm3c8dR2Hv56D28dOAScDEA7nto+pc8TkXSoh9EACoXgUe7QPTD4Lydf+9vxPm/QDkWWv3UwuryaJx7YzNDx6ACkXoZIfqiH0aAGnxwbLCZjrkdnyT117twpfV6xZzGufIoBSETSoYDRoN78xtTfe/HQGpp8/C7yd4++O6VhpHmLogPQvIUTp28XkWxRwGhQIzGHn6KcP7yKFj9lXLkPD09pHuPKT66hZfbsMWUts2dz5SfXTLmNIjL9NIchzP5dOP5zYOhk2XEbjKw7lWGk4jzFEw9s5q2Dh5i3UKukRPJIAaNB2enxJ7mHXgU7dWz9ub4gcvJ7qsNIKy5fpQAhknMakmpQ8z9H7HtqjLw9PrhcPLSGZtcwkoicpB5Ggypmmz30d1N7//nDQW/guTmbGeTkMBLAf//3ndqAJzIDKTVIg9v/+XCn9yTtbt7Oc7M2M2gnAwPAw1/vGbOnwpqbOeXUUzj6zqACiEjGKDWITMoZNwXpyieTU2o0NYiNTQ3SMnvWuA14PjzM0XcGx9SDqacQEZHs0hxGg5v7QZh/a5gexKDp9DBFSAW7m7fzo1mbIlODvPtO9Mqp8npTTSEiItmmHsYMMPeD4++gV7xxUmnPo9izcBup6fdpB7dIY1LAmKGKAWQ0my3RSQfHMIMYc15TTSEiItmmgDGDlfY8Bp+EwXuq9AxiLpDIxzIKEZkszWEIEASOSjmfiqwp3j+Xdwerz3WISP4oYMioqJxPpXxkBGuuvhtwMrvBe3p6aGtro6mpiba2Nnp6emK/V0Sml4akZFRxKeyWjZvwkfET36ecNheD0WW0USazG7ynp4eOjg6OhLcL7O/vp6OjA4BC1A0+RCRV2rgn4xTvkFe+Qa/JYHhoeEzdZSvew69/OTCppILF27UeHjjIm0fe4dHnn+GFvX0nP3PZMvbs2VPXcxKZibRxTxIXlV32xLvHInsW/TteYs1tn469Ua80GJkZ8+eezsdXBTPvxaCxd+/eOp2JiNSTAoZEKs8u23Xjuop1t2zcNPqeKMUeRaU7781umcWHf+/S0YCxdOnSqTZbRBKkgCGxzFu0oOIXvo+MsPnub7L57m+OyycVNbwV5czW0wCYNWsWXV1d9W28iNSFVklJLHEnst86cIiH7v3W6K1cn3hgc9VgAfDmkXcAmDdvnia8RTJKAUNiWXH5Ki758BWx6g4PDbNlwz+w46ntFXslpY4PneDR558B4NChQ1pqK5JRia2SMrO/BD4PFJNr/2d3fyQ8difwWWAY+A/uvrXa52mVVDbseGp7xWW35Vpmz6Zl9qwJkxYOj4zw3R9/f3T+YuHChRw9enR0qS1Aa2sr3d3d6nmITFK9V0kl3cP4O3dfGT6KwWI5cAPwXuBaYIOZxbw3nKRtxeWr+Ni6tRNu8CsaOn4cg4p1jw8NjQkWra2tAGOCRfF1Z2dnbQ0XkZqlMSS1Brjf3Y+5+6tAH6CbJ+TIistX8YefL1RNJQJwdHBwTN1iepF5ixZwdvtyDtsJzIxly5bR3d3NoUPRQ1haaiuSvqRXSd1uZjcDvcB/cvdfA+cAPy6psy8skxwpLrvd8dR2ttzzLXx4OLLevIULxi3RLfWnX/zCmNednZ309/ePq6eltiLpq6mHYWaPmdmOiMcaYCPw28BK4HXgb4pvi/ioyIkUM+sws14z6x0YmMJ9RiVxKy5fxcduuZlTThuf0nwyaUIgSBXyzjvvjCtvbW3VUluRDKiph+HuV8epZ2ZfBx4KX+4Dzi05vATYX+Hzu4FuCCa9p95SSVJpb6N0d/hk7u9dnleqaOHChdx1112a8BbJgCRXSZ3l7q+Hz78AXOruN5jZe4FvE8xbnA08Dlzg7tFjGiGtkmpsbW1tkUNRyislMnV5yiX1ZTNbSTDctAf4UwB332lmDwI/B4aA26oFC2l8lSa1Ndktkh2JBQx3/9QEx7oADUrLqKVLl2qyWyTjtNNbMqGrq2t0H0aRmdHf3x+527u4G9zMaGlpwcy0K1wkYQoYkgmFQoHu7m6WLVsGBMGiOL9WvLFSMRgUJ8iLPZLhcElveT0RqS/dQEkyp9oEeKXj5fVEZrq8pQYRiaU04WClYFCcAK82Ea6JcpFkKGBI6kqHmCbq8RYnwKtNhGuiXCQZChiSus7OznEb9sqV7vaOmiCPqici9aWAIambaD6iNDFhcbd3+QR5c3OQ7Li8nojUlya9JXUtLS2jK53K3XfffQoAIlOkSW9pOJWCBaBlsiIZooAhqSsOLUXRzZNEskMBQ1I30SQ2UHG3t4hMLwUMSV1xErs4eR0liV3cpXs/FJBEqlPAkEwoFAps2rRpwp5GPYenyvd+KK2ISHVaJSWZ0tPTU/E2rRAssx0ZGan59+j+GzITaJWUNLRCocCePXsqToTXaxe37r8hMnkKGJJJURPh9dzFXSnwKK2ISGUKGJJJpbu5o3Z71yrpgCTSiBQwJLOKw1MjIyPs2bNnXLCoZZVT0gFJpBFp0ltyqbjKqTRpYWtrq770RUpo0luE6Ay3R44cYe3atdpXIZKQlrQbIDIVlZbdlt+uFVCPQ6RO1MOQXJpoV3iR8lCJ1JcChuTSRBluS2lfhUj9KGBILk2U4baU9lWI1I8ChuRStQy3oH0VIvWmgCG5FLWPYt26ddpXIZIg7cMQSVJPD3R2wt69sHQpdHWBgphMk3rvw9CyWpGk9PRARwcU94v09wevQUFDcklDUiJJ6ew8GSyKjhwJykVySAFDJCmVlvRqqa/klAKGSFIqLemtdalvTw+0tUFTU/BTKVBkmihgyIyX2L29u7qgfOlva2tQPlXFeZH+fnA/OS+ioCHTQAFDcqseX/SJ3tu7UIDubli2DMyCn93dtU14a15EUqRltZJL9Upvnrt7ezc1BT2LcmZQh3udS2Op97JaBQzJpXp90Tc1NRH1/4CZMZLFL+C2tmAYqtyyZZDFACep0v0wRKicVHCyyQZzd2/vJOZFRGJSwJBcqtcXfe7u7Z3EvIhITAoYkkv1+qLP5b29C4Vg+GlkJPiZ5bZKQ1HAkFyq5xd9oVBgz549jIyMsGfPnjGfkdiSW5EcqilgmNmfmNlOMxsxs/ayY3eaWZ+ZvWRm15SUXxuW9ZnZHbX8fpnZJvqir0UxSJgZn/rUp5JZciuSQ7X2MHYA1wM/KC00s+XADcB7gWuBDWbWbGbNwN3AR4HlwI1hXZFMKN2XAYxbQZX4bV+1i1syrKZste6+C4IliGXWAPe7+zHgVTPrA1aFx/rcfXf4vvvDuj+vpR0i9dLZ2Tlmb0eUqOW8daHstpJxSc1hnAO8VvJ6X1hWqTySmXWYWa+Z9Q4MDCTSUJFScZblmlkyw1LTsYtbPRipQdWAYWaPmdmOiMeaid4WUeYTlEdy9253b3f39sWLF1drqkjN4izLdfdkhqWSzm6rPFRSo6oBw92vdvcVEY/NE7xtH3BuyeslwP4JykUyIc69wmHyGwRjSSq7bZHyUEmNkhqS2gLcYGZzzOw84AJgO/AT4AIzO8/MZhNMjG9JqA0ik1a+XLe5uTmyXiI7wZPexa37c0iNal1W+wkz2wdcBjxsZlsB3H0n8CDBZPa/ALe5+7C7DwG3A1uBXcCDYV2RzChdrrtp06bp2wme9C7upHsw0vCUfFCkip6eHjo7O9m7dy9Lly6lq6sr2zvBKylfhQVBD0apRRqWstWKyNT19ARzFnv3Bj2Lri4FiwambLUieZDV5avKQyU1qGnjnohE0AY8aVDqYYjUW6Xlq2vXZqenITIFChgi9VZpmerwsDbKSa4pYIjU20TLVLVRTnJMAUOk3qI24JXq71cvQ3JJk94i9Vac2F67NhiGiqJJcMkh9TBEklAowKZNlXsaR47A+vXZXHorUoF6GCJJKfYebrop+vjBg8EDtPRWckE9DJEkFQpBTqg4NCEuGaeAIZK0apPgpZQ5VjJMAUMkaVFZaBcujK6rzLGSYQoYItOhPIfTXXcle+8LkQQoYIikIel7X4gkQAFDJC2VMsdmNdOtzHhaViuSJcp0KxmmHoZIllTKdKvltpIBChgiWVJpWa2W20oGKGCIZEmlZbVabisZoIAhkiVRm/y03FYyQgFDJEu03FYyTKukRLKmUFCAkExSD0MkL7Q/Q1KmHoZIHmh/hmSAehgieZDl/Rnq+cwY6mGI5EFW92eo5zOjqIchkgdZ3Z+R5Z6P1J0Chkge1Lo/I6lho6z2fCQRChgieVDL/ozisFF/P7ifHDaqR9DIas9HEmHunnYbYmlvb/fe3t60myGSP21tQZAot2xZkFa9FuVzGBD0fLTZMBPM7Fl3b6/X56mHIdLokhw20s70GUWrpEQa3dKl0T2Meg0baWf6jKEehkijU0JDqRMFDJFGl6dhI20CzDQNSYnMBHkYNtImwMxTD0NEskGbADNPAUOk0eR1WEebADOvpoBhZn9iZjvNbMTM2kvK28zsqJn9NHzcU3LsX5vZz8ysz8y+amZWSxtEpESSm/SSpk2AmVdrD2MHcD3wg4hjr7j7yvBxS0n5RqADuCB8XFtjG0SkKM/DOlrNlXk1BQx33+XuL8Wtb2ZnAfPc/WkPtph/C/h4LW0QkRJ5HtbJ02quGSrJOYzzzOw5M3vSzP4gLDsH2FdSZ19YJiL1kPdhnUIhSFcyMhL8VLDIlKoBw8weM7MdEY81E7ztdWCpu18M/BnwbTObB0TNV1RMZmVmHWbWa2a9AwMD1ZoqIhrWkQRV3Yfh7ldP9kPd/RhwLHz+rJm9AlxI0KNYUlJ1CbB/gs/pBrohSD442XaIzDjFv8g7O4NhqKVLg2Chv9SlDhIZkjKzxWbWHD4/n2Bye7e7vw68bWbvD1dH3QxsTqINIjNWUsM6eV2uK3VT67LaT5jZPuAy4GEz2xoeugJ4wcyeB74D3OLuh8Jj64BvAH3AK8A/19IGEZkGeV6uK3Wj+2GISHVJ3lNDEqP7YYjI9Mvzcl2pGwUMEaku78t1pS4UMESkOi3XFRQwRCQO7cIWdD8MEYkrD/fUkESphyEiIrEoYIhIPmkj4bTTkJSI5I9u55oK9TBEJH/yfN+PHFPAEJH80UbCVChgiEj+aCNhKhQwRKS+pmMyWhsJU6GAISL1M11ZbbWRMBXKVisi9aOstpmibLUikl2ajG5oChgiUj+ajG5oChgiUj+ajG5oChgiUj+ajG5oSg0iIvWlrLYNSz0MERGJRQFDRERiUcAQEZFYFDBERCQWBQwREYlFAUNEskd308skLasVkWzR3fQySz0MEckW3U0vsxQwRCRblMAwsxQwRCRblMAwsxQwRCRblMAwsxQwRCRblMAws7RKSkSyRwkMM0k9DBERiUUBQ0REYlHAEBGRWBQwREQkFgUMERGJRQFDRERiqSlgmNl/NbMXzewFM/tfZnZmybE7zazPzF4ys2tKyq8Ny/rM7I5afr+IiEyfWnsYjwIr3P13gZeBOwHMbDlwA/Be4Fpgg5k1m1kzcDfwUWA5cGNYV0REMq6mgOHu/9fdh8KXPwaWhM/XAPe7+zF3fxXoA1aFjz533+3ux4H7w7oiIpJx9ZzD+Azwz+Hzc4DXSo7tC8sqlYuISMZVTQ1iZo8BvxVxqNPdN4d1OoEhoHhbLIuo70QHKJ/gd3cA4Z1TOGZmO6q1N6cWAQfSbkSCdH75pvPLr/fU88OqBgx3v3qi42a2Fvgj4Cp3L3757wPOLam2BNgfPq9UHvW7u4Hu8Pf0unt7tfbmUSOfG+j88k7nl19m1lvPz6t1ldS1wBeBj7l76S2ytgA3mNkcMzsPuADYDvwEuMDMzjOz2QQT41tqaYOIiEyPWrPVfg2YAzxqZgA/dvdb3H2nmT0I/JxgqOo2dx8GMLPbga1AM/D37r6zxjaIiMg0qClguPvvTHCsCxh3xxN3fwR4ZAq/rnsK78mLRj430Pnlnc4vv+p6bnZy2kFERKQypQYREZFYMhcwZlq6kTy3HcDMzjWzJ8xsl5ntNLP1YfkCM3vUzH4R/pwflpuZfTU83xfM7JJ0zyCeMFPBc2b2UPj6PDN7Jjy/B8JFHIQLPR4Iz+8ZM2tLs91xmNmZZvad8P+7XWZ2WSNdPzP7Qvhvc4eZ/ZOZnZLn62dmf29mvyrdZjCV62Vma8P6vwhXu1bn7pl6AB8BWsLnXwK+FD5fDjxPMMl+HvAKwcR5c/j8fGB2WGd52ucR81xz2/aSczgLuCR8fjpBipjlwJeBO8LyO0qu43UEGzwNeD/wTNrnEPM8/wz4NvBQ+PpB4Ibw+T3AuvD5rcA94fMbgAfSbnuMc9sEfC58Phs4s1GuH8HG4FeBU0uu27/L8/UDrgAuAXaUlE3qegELgN3hz/nh8/lVf3faJ1/lP8wngJ7w+Z3AnSXHtgKXhY+tJeVj6mX5kee2T3BOm4EPAy8BZ4VlZwEvhc/vBW4sqT9aL6sPgv1CjwMfAh4K/+c7wMk/bEavY/HfZfi8JaxnaZ/DBOc2L/xCtbLyhrh+nMwusSC8Hg8B1+T9+gFtZQFjUtcLuBG4t6R8TL1Kj8wNSZVp9HQjeW77OGH3/WLgGeA33f11gPDnb4TV8njOXwH+AhgJXy8E3vSTedRKz2H0/MLjh8P6WXU+MAB8Mxxy+4aZzaVBrp+7/z/gvwF7gdcJrsezNM71K5rs9ZrSdUwlYJjZY+F4YvljTUmduOlGKpXnQZ7bPoaZnQZ8F/iP7v7WRFUjyjJ7zmb2R8Cv3P3Z0uKIqh7jWBa1EAxvbHT3i4FBgiGNSnJ1fuFY/hqCYeyzgbkE2bLL5fX6VVPX781aN+5NiaeYbiRjJjqn3DCzWQTBosfdvxcWv2FmZ7n762Z2FvCrsDxv5/z7wMfM7DrgFIIhnK8AZ5pZS/hXaOk5FM9vn5m1AGcAh6a/2bHtA/a5+zPh6+8QBIxGuX5XA6+6+wCAmX0P+ACNc/2KJnu99gGry8q3VfslmRuSspmVbiTPbQeCVRjA/wB2ufvflhzaAhRXXqwlmNsolt8crt54P3C42JXOIne/092XuHsbwfX5vrsXgCeAPw6rlZ9f8bz/OKyf2b9Q3f2XwGtmVkxSdxVBhoaGuH4EQ1HvN7PW8N9q8fwa4vqVmOz12gp8xMzmh72wj4RlE0t78iZiMqePYGztp+HjnpJjnQSril4CPlpSfh3B6pxXCLLopn4ekzjf3LY9bP/lBF3ZF0qu2XUE476PA78Ify4I6xvBTbReAX4GtKd9DpM419WcXCV1PsEfLH3A/wTmhOWnhK/7wuPnp93uGOe1EugNr+H/Jlg10zDXD/gr4EVgB/CPBCstc3v9gH8imI85QdBT+OxUrhfBHHFf+Ph0nN+tnd4iIhJL5oakREQkmxQwREQkFgUMERGJRQFDRERiUcAQEZFYFDBERCQWBQwREYlFAUNERGL5/4Pys0ghDhC1AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1c787978e80>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#画出聚类结果，每一类用一种颜色\n",
    "colors = ['b','g','r','k','c','m','#e24fff','#524C90','#845868']\n",
    "\n",
    "n_clusters = 9\n",
    "mb_kmeans = MiniBatchKMeans(n_clusters = n_clusters)\n",
    "mb_kmeans.fit(train_pca)\n",
    "\n",
    "#cluster_centers_ : array, [n_clusters, n_features] Coordinates of cluster centers\n",
    "#labels_ : Labels of each point (if compute_labels is set to True).\n",
    "\n",
    "train_pca_pred = mb_kmeans.labels_\n",
    "cents = mb_kmeans.cluster_centers_#质心\n",
    "len(train_pca_pred)\n",
    "\n",
    "#        plt.scatter(train_pca[i][0],train_pca[i][1],color='red')\n",
    "#nonzeros(a)返回数组a中值不为零的元素的下标\n",
    "for i in range(n_clusters):\n",
    "    index = np.nonzero(train_pca_pred==i)[0]\n",
    "    x1 = train_pca[index,0]\n",
    "    x2 = train_pca[index,1]\n",
    "    for j in range(len(x1)):\n",
    "        if j < 20:  #每类打印20个\n",
    "            plt.scatter(x1[j],x2[j],color=colors[i])\n",
    "\n",
    "plt.axis([-200,1000,-200,30])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#没类随机取20个打印的结果，看起来聚类还不错，哈哈"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAD8CAYAAABkbJM/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3X+YXFWd5/H3t6vTIR1ASCeDSEg1KMpkosOPbJQVMW4QEVwj7IzCVCQOaksijD/2eVaY3md33Gf6D2fVEdEAreOIpBWy6kwYwckYBBxcJIZFIBjQGFIhgjE/5EfSIZ10n/3j3tup7r636lbVrap7K5+XTz1VdevWrXOtUN8+53vO95pzDhERkUo6Wt0AERHJBgUMERGJRQFDRERiUcAQEZFYFDBERCQWBQwREYlFAUNERGJRwBARkVgUMEREJJbOVjcgrtmzZ7ve3t5WN0NEJDMeeeSR3c65OUkdLzMBo7e3l40bN7a6GSIimWFmxSSPpyEpERGJRQFDRERiUcAQEZFYFDBERCQWBQwREYlFAUNERGJRwBARkVgUMEREJBYFDBERiUUBQ0REYlHAEBGRWBQwREQklswUH0yroZ07+dDmzRwu2TZ/xgyefPObW9YmEZFGUA+jDkM7d7JsUrAA+OWBA9j997eiSSIiDaOAUYf+rVvLvm7338/Kn3+f3i/10vHZDnq/1MvQE0NNap2ISLI0JFWH7QcPVtzn5pdPgK7XAUWKLxbp+5c+AApvLDS4dSIiyVIPow7zpk+vvFNHB7zu2vGnw4eGWf5Py9XjEJHMUcCow8Dpp8fbcdqrJjwddaM43HiPQ0FDRLJAAaMOhZNOir/znCUTns7dcw6XPvq3vOuhAb7317tY/he38JP7n0q4hSIiyVHAqNOMODuZwekfGX86d885nLvtL5g+eizm/2/fywf58hfXKWiISGopYNRpePHieEFj+h8B8KZtl/MfnvkgOTd1voFz8A+D9yfaPhGRpChgJGB48WLc4sXMn1EmdBz8PW/adjmv3f02DIvcbd/LlWdeiYi0ggJGgp5885tZcsIJXleh1OgrsPXrnL77/LLBIqBhKRFJo5YFDDO72MyeNrMtZnZ9q9qRtPVnncVrdnwTXvkduDHv/unPw657YwULgBu/sI7Bm3/c2IaKiFSpJQHDzHLAV4F3A/OBK81sfiva0gi//eBtnPCLFfCTJfDwlbDr3qqPse7uJ7h17vd5qPchdg7tHN8+NAS9vd7yjt5e77mISDO0qoexCNjinNvqnBsB7gCWtqgtDfGH6//AktOWVN4xisFDb/gdB4sH2bxsM/fb/dzXcT//76pfUSx6o17FIvT1KWiISHO0KmCcAjxb8nyHv62trL9qPasvX830XIwV4SFennFownNz8J6x5/grfjW+bXgYPvGJupopIhJLqwJG2GC+m7KTWZ+ZbTSzjbt27WpCs5JXeGOBV/77K6xYuIKR3P6q3pubXAYX7/+4/8xzE7bt2aNehog0XqsCxg7g1JLnc2HSryDgnBt0zi10zi2cM2dO0xrXCKsuXcVbL381YxYSBSKMRpSGzIVs6++vrV0iInG1KmD8HDjDzE4zsy7gCuCuFrWlaW646sN86tOX1n2csZBtxaJ6GSLSWC0JGM65w8C1wDpgM7DGOfdkK9rSbBcsPpM3/uncuo7hgCXsnLJdCXARaaSWrcNwzt3jnHu9c+61zrmBVrWjFf7mb/8LHR3x1mSE6QQ+xdTFfcPDGpoSkcbRSu8Wue5TF5HL1R40unETZksFisV6WiUiEk0Bo0UuWHwm137youiehoO5u2ZGvj9sttT4a+bdgoV9+x+A5z4Kz17m3e9/oP72i8jRR5dobaELFp8JwC1fuZeDB0tmTzk48aUuLtvwurLvD5stVapYhLX/A956FuRGvW2ju+APq7zHM99eY8NF5KikgNFiQdAY+tb/ZffvX+a4A9M476lX84bnZiVy/E+99kiwCLiD8OJqBQwRqY4CRgpcsPhMLlh8JjuHdvLUh57CHZ6yhrFqS0+Bz8yHUyIqro/urvsjROQooxxGipxUOIkzv3kmuZ5Kg03lLT0FPncWzO32chmhTLkMEamOAkbKnFQ4ibftfhvWWfsMqs/Mh+5KfccxL5ehoCEicSlgpNSZ3zyz4j4vRqS9XxPrmrFeLuMPX6+mVSJyNFPASKmTCieVfX0E+AqvD33thZH4n+Nehr23aNqtiFSmgJExDq+W1N/xx9xLeFCpNmW+/1+96bY4737v38Oz71PwEJGJFDBSbLFbDHgBoPS2hMWRwQLgxK5kPn90F+z9soKGiHg0rTblgqAxNORdKGnPnsrvee6AN0MqEaNenkNrNkREPYyMKBRg927v0qwrVpTf93O/9Parx9bcBr43vZ9vHbOC7470s+nBDfUdUEQyTz2MDFrll/YYHITR0amvr/0tfPaNMKu2K8OyNbeBh6YNMWpe9ny/7eXur3l10xecv6imY256cAP33bmWl3bv5fjZs3jHB5bWfCwRaQ31MDJq1So4fNjrSXSEfIv/84naexmPdq4dDxaBwyMj3Hfn2gnbNj24gZuu62fgyhXcdF10L2TTgxu4+2tDvLR7LwAv7fYCkHotItmigNEGPvax8O21Boz9tjd0e/CDD9UFgfvuXMvhkcoBSETSTQGjDaxaBUuWTNz22TeG9zzimOmiCx8GAaGaIFAaaCZs3xO+XUTSSQGjTaxfD6tXQz7v1Y+qZ2rt2YeXRi7m+Lfb1gDVBYHjZ4cHoON7kqnIKyLNoYDRRgoF2LYNxsbKFB2M4fTR6GT0gX37geqCwDs+sJTOrokRrLOri3d8YGntjRSRplPAEHKnTny+NVc5Gf26sxdM2RYVBBacv4hLP1rwgox5webSjxY0S0okYzSttk3ZcV6dqDhGn534fEPnGu8asCGOOXYmmx7cwOMP/GzKa296+1sig8CC8xcpQIhknHoYberEj1D5Gq4htuY2MGL7I1+ff965oQlvgC2Pbqr+A0UkM9TDaFNBKY+9f1/d+x7tXBvZuwB44t8f5tArB0Nf06wnkfamHkYbm/l2yM2p7j1RazACUcECwKyj5sV4Q0ND9Pb20tHRQW9vL0NDQzUdR0QaRz2MNveqZd6V9Vz07/wEM92sikEjihsbm1BCJG45kKGhIfr6+hgeHgagWCzS19cHQKFQqKktIpI89TDa3My3w4kr/Z6GARXqSx3n5lR/QY0SweK9alaC9/f3jweLwPDwMP39/bU3REQSZ67esqZNsnDhQrdx48ZWN6Mt7H8A9t4EHJ64fWtuAw9O+8eyOYx6HT97FtfdNDBhW0dHB2H/Ds2MsbGxxjVGpM2Z2SPOuYVJHU9DUkehICH+wtdhrGTqbaWEdxJKE+PBkNX/en8fLwzv40ePPczj27eMvz5v3rzGNkZEqqKAcZSa+fYjgSPocdSau6hGsBI8GLI6PDKCmXHizON43yKvQY9v30J3dzcDAwPlDiUiTaaAIeOBY+ats9jvGhc0SleCh63l6Oqcxjv/9M28aIcYGBhQwlskZRQwBPCCxoW5peN/9Sdt8iypqOKFJ8w8jm3btiX++SJSP82SknFBzackdXZ1sfTjf8l1Nw1MmFIbVbzwhf0vs3LlytjH1/oNkeZRwJAJFpy/iBnHzkzkWFFFBjc9uIGRVw5OmRk1cvgQP3rsYQYHB2MdP1i/USwWcc6Nr99Q0BBpDAUMmeKPzzu37mME02fDgsXdXxvilX37Mb8Gu3OO/a8c4J83PMDj27cwGnah8hBavyHSXMphyBRJFBEMmz4blbcwM0ZGD49Pqc3l4lVN3L59e1XbRaQ+6mHIFFE/7NWYPH220jFP6D52/HFQFqSSqHUaWr8h0hgKGDJFVEK6GsEFlqJKoU/2wvA+AGbOnMmqVatifcbAwADd3d0Ttmn9hkjjKGDIFGGXVK3Wo+v/nU0PbojVWwmS3QD79++PPfOpUCgwODhIPp/HzMjn8wwODmr9hkiDNKyWlJn9DfBRYJe/6a+dc/f4r90AfBgYBf7KObeu0vFUS6q5xvMOe/Zi1oGroaaT5XJMn3EMr+yLviDT6NgY3/vZjyeUBJk2bRqHDh0af97d3a1AIFKDpGtJNTpg7HPOfX7S9vnAd4BFwGuA9cDrnXNlp8YoYLROaRmPak2b3oVzhL535PCh8ZlRleTzeS3oE6lS0gGjFUNSS4E7nHMHnXPPAFvwgoekVLCg7/jZs8C8HMc577wgVq7j0MGRI+8FrMP7J3f87FmxgwVo5pNIGjR6Wu21ZnYVsBH4r865PwCnAD8r2WeHv01SbMH5i0IvfrTpwQ3cdcu3cGXWTkS99wv/En+BnWY+ibReXT0MM1tvZptCbkuBm4HXAmcBzwNfCN4WcqjQcTEz6zOzjWa2cdeuXWG7SIstOH8R773mKrDwuujlVo2HzXIKo5lPIulQV8Bwzl3onFsQclvrnNvpnBt1zo0BX+PIsNMO4NSSw8wFnos4/qBzbqFzbuGcOVVenFqaZsH5i1i68kPkOicuuMt15rho+fsj31c6ywkYX/ldqqenJ7GEt+pOidSnYUNSZnayc+55/+llQLB8+C7g22b2Rbyk9xnA1Ot2SqYEQ07BzKrje6Kv4T35Wt8/WL2G9yx7P8Viccq+xx57bGLBQtcNF6lPI2dJ3Y43HOWAbcDHggBiZv3A1XgXCf2kc+6HlY6nWVLtIWzGVWdXF9954B4eK05NgCd1mdbe3t7QgKTZV9LOMnOJVufcB8u8NgBoUPooFLby+/DICBef/dbQgJFUslt1p0Tqp5Xe0hSbHtzATdf1R678Pu6YGVMS4GZGsViks7MTM5uQdwjyEWYWeps9e/aEHIXqTonUTwFDGi5OAcLjZ8+akgAPhkuDcudB3mHlypXj18GIsmfPHq6++urxoKG6UyL1U8CQhqtUgDC41nehUGDbtm3k8/kpF1cKDA8PMzg4OOU6GGFGRkbGr42hulMi9dP1MKThKvUs3vGBpTxW/DXvWfZ+tm/fHhksAnEvsAQTcxSFQkEBQqQO6mFIw0WVEAmuyvdY8dcTLrVaSdwLLIFyFCJJUsCQhgsrlx4MQ0H4pVajmBl9fX2xVoh3dXUpRyGSIAUMabiw4oWXfrQwvqivXPK6lJlxzTXXsGrVqgkJ8jA9PT184xvf0BCUSIIatnAvaVq41746Ozsj8xLTp09nZGSEefPmMTAwoAAgUoV2KG8uMkG5JPbBgwdxzrFjxw5++tOfJvq5k2tLrVy5UrWmRMpQD0NaLqpsR5gVK1bEvuZ3OZNrS4XRlf4k6zJzxb2kKWC0rzg/3oFcLsfhw4fr/sy4QUq1piTLMlNLSiSu4C/4ZcuWVdy3mjUY5cStIaVaUyJHKIchqVAoFFiyZEnTPi/OtFyAWbNmKa8h4lPAkNRYv359U4LG0NAQ+/fvr7hfV1cXL7300viCwqCWlYKGHK0UMCRV1q9fj3Muco1FPp+v+8p5QX2pKGZGT08Phw4d4tChQxNeGx4ervh+kXalgCGpFFVd9pJLLplQRqSWv/rL5SXy+Ty33347Bw4ciCxToryGHK0UMCSVoqrLrlmzZspsqmr/6o+qL2VmDAwMVCxVovpUcrRSwJDUCsqdj42NjU9t3bNnT+i+xWIxdi8jrPcSlB0pFAoVexD79u2rL48xNAS9vdDR4d0rJyJZ4ZzLxO3cc891cnTL5/MO7xrxobfu7m63evXqWMdavXq1y+fzzsxcPp+f8L5Kn1PtZ036YOe6u52DI7fubm+7SMKAjS7B32Et3JPM6OjoqFj+PJfLMTY2VlftqbgLCWta1NfbC2ELBvN50AJBSZhqSclRK07uYHR0tO4psJPzJ1FqSn5HvUeJdMkABQzJjLDcQzn1TIEtzZ9ETfGtKfkd9R4l0iUDFDAkM0r/8o9rci+gljUcUVN8a7o408AATA563d3edpG0SzIh0sibkt5SKk5iGnC5XG48Ob169WrX3d1dU/K6XJK8aqtXO5fPO2fm3SvhLQ2Ckt4i1VW4NTOcc+RyudDihapIK+1K1WpFOFLhdvny5RUr2AZ/FEXtp5XbIvEohyGZVSgUuO2226pKhIdJfOV26cK82bO9mxl0dnr3SS/W00JAaRL1MCTTgp5Gf38/27dvp6Ojo6prZtScvI4yNAR9fRAMlZWuTA/aVSx6+wDUezW/yZ+X5LFFJlEOQ9pKnNxGEov7IkUtzAuTxGI9LQSUMpTDECmjtMdRLBbHE96Bhl+nu5p8SNzAUsvnKS8jDaAchrSdYNGdc47bb799SsXbeoNFsJbDzOjs7MTMjqzpqCYfksvV2oAjOYuOiP+EtRBQGiHJObqNvGkdhjRKNWsswtZyBLdp06a51StWTC0uWO5WfWMrH1/FDMVHwuswWh4I4t4UMGSyJBbTVbuYr9KCwZ6enokL83p6vPuwH/Z8vvqTzufDj5XLaSGgTKGAIeLqW7VdKioA5CN+zM2s4urykMYmV9I8KviYVX8saXtJBwzlMCSTwq6KV0uxwahFe1Hba1qzUSjA4KA3c8nMux8crG3aq4oXSgspYEgmVftDHyUqAERtr1Qxt6enJ/yFQsGb5jo25t3XmnhvZPFCLQCUChQwJJOq/aGPUqkS7eTqtgCDg4OhgaGrq4sbb7yxqs+vWpK9lVLBAsBi0RvkChYAKmhIqSTHtxp5Uw5DSiWVwwiOFZY8r/QZiVawbbWoZHotiXlJDVStVsQzNDQ0XhKkEau2e3t7KYYsrmvL6rYdHV6ImMzMG0aTTErVJVrN7M/N7EkzGzOzhZNeu8HMtpjZ02b2rpLtF/vbtpjZ9fV8vhzdSq+Kt23btsSCRTAMFRYsgMjtmaZkusRQbw5jE3A58JPSjWY2H7gC+BPgYmCVmeXMLAd8FXg3MB+40t9XJBWCWlTlgkKu1hXa8RrQmsSzrgQoMdQVMJxzm51zT4e8tBS4wzl30Dn3DLAFWOTftjjntjrnRoA7/H1FUiFsuu5ksarh1vLD38rEc6OS6dJWGjVL6hTg2ZLnO/xtUdtDmVmfmW00s427du1qSENFSsWZlmtm5a8FXumHPyqY9PcfKVMeGB72tielXCBLauqvtK2K1WrNbD3w6pCX+p1za6PeFrLNER6gIrPuzrlBYBC8pHeFporUbd68eRVzFM45+vv7o3MmlX74o65f0ejKs7p2htSpYg/DOXehc25ByC0qWIDXczi15Plc4Lky20VSodLCvEDZnki5H/5ywaTRiedm9GCkrTVqSOou4Aozm25mpwFnABuAnwNnmNlpZtaFlxi/q0FtEKlaoVBgcHBwvCR6VIK77ALBcj/85YJJoxPPunaG1KneabWXmdkO4DzgbjNbB+CcexJYA/wS+Ffg4865UefcYeBaYB2wGVjj7yuSGqXTdcOuGV7xsq7lfvjLBZNGJ541dVbqleQqwEbetNJbWqXqFd2rV3tlzYPV0kHJ8+C1pCrXVn8irftsaQkSXumtS7SKVFAoFOIvCly5Em65ZeKq6QMHSg/m3ff3e0NB8+Z5PY9mJJ1b+dnSFlQaRCQpQ0PwwQ+Gl9jo6YHdu5vfJjmqpao0iIiU6O8PDxYAe/ao8qtkngKGSFIqzTZavlxBQzJNAUMkKZVmG42O6hoTkmkKGCJJGRiArq7y+2ihnGSYAoZIUgoFOO64yvsVi+plSCYpYIgkae/eePv19XlTcHUNbckQrcMQSdK8eV4PopLhYbj55iPPi0W4+mrvsdZFSEqphyGSpLCyIHGNjMAnPpFse0QSpIAhkqSwelA9PfHfv2ePhqgktRQwRJI2+UJEN95YXa+j2VfbE4lJAUOk0cJ6HTNnVn6fpuBKyihgiDTD5F7HrbfGe5+uVSEpooAh0gpxZ0LpWhWSIgoYIq2Sz1feZ/t2bxhLSXBJAQUMkVaJMwU3qH6rJLikgAKGSKtUOwVXSXBpMQUMkVaanAyvVFpESXBpIQUMkTSplORWElxaSAFDJE3K5TW6u73XRVpEAUMkbWbMOPLYzLvP5718hwoTSgupWq1IWgwNeTOhhoePbJsxQ4FCUkM9DJG06O+fGCxg4syooaGJ18/Q9TSkydTDEEmLqBlQ27dP7X0Ui1Ovp9HX5z1Wb0QaRD0MkbSImgE1b15472OyVq3TmNzzUU+nbSlgiKTFJZdEb4+7/qLZ6zSCnk+xqLLsRwEFDJG0uOee6O1x1180e51GpbyLtBUFDJG0KJfDiFN3qtw6jUYNG5VrczU0rJUJChgiaVEuhxFWd2rFionPo6bfNnLYqFyb49KwVmaYC6phptzChQvdxo0bW90MkcYJW4fR3V3/OozeXu9HeLJ83qtfVY8k2tzI9h3lzOwR59zCpI6nHoZIWoT1IpJYtJfUsFGYJNrcyPZJotTDEGl3af8LPu3tyzD1MESkOmEJ87AEeasSz3HbJy2ngCHS7uIMG7Uy8VzaPoBc7sjUXCW+U0VDUiKSjmGhRiX9j2IakhKR5KUh8axFgKmngCHSbmrJRSSxnqJeaQhaUlZdAcPM/tzMnjSzMTNbWLK918wOmNkv/NstJa+da2ZPmNkWM/uyWXCFGBGpW625iDQkntMQtKSsensYm4DLgZ+EvPYb59xZ/u2aku03A33AGf7t4jrbICKBWod1GrUGpBppCFpSVl3Xw3DObQaI20kws5OB451zD/nPvwW8D/hhPe0QEV89wzqFQmuTy8Fn9/d77Z03zwsWSninRiNzGKeZ2aNm9oCZvc3fdgqwo2SfHf42EUlC1oZ1JudbwJuVNTbm3StYpErFgGFm681sU8htaZm3PQ/Mc86dDXwa+LaZHQ+EdUUi5/WaWZ+ZbTSzjbt27arUVBHJ0rCOig5mTsWA4Zy70Dm3IOS2tsx7Djrn9viPHwF+A7wer0cxt2TXucBzZY4z6Jxb6JxbOGfOnLjnJHL0SkMuIi5No82chgxJmdkcM8v5j0/HS25vdc49D7xsZm/xZ0ddBUQGHhGpQaGQzLDO5OGilSuTLR2iabSZU++02svMbAdwHnC3ma3zX7oAeNzMHgO+C1zjnNvrv7YC+DqwBa/noYS3SNqEDRfdfHOyw0dZy7eISoOISIioUiGT1VM6RKVAGk6lQUSk8eIOC9UzfJSlfIsAda7DEJE2NW9evB5GvcNHrV77IVVRD0NEpgqbnjtZWqfrSsMoYIjIVGHDRStWVB4+atVFmKQpNCQlIuGqHS6anMQOZlIFx5LMUw9DRJKhhXhtTwFDRJLR7IV4Gv5qOgUMEUlGMxfiqQ5VSyhgiEgymln4UMNfLaGAISLJaOZCPNWhagnNkhKR5DRrIV7UwkLVoWoo9TBEJFnNSEZn6bofbUQBQ0SS06xktOpQtYSq1YpIcqKq3NZT1VZqpmq1IpJeSka3NQUMEUmOLorU1hQwRCQ5tSSjtWI7MxQwRCQ51SajtWI7U5T0FpHWUZK8oZT0FpH2oSR5pihgiEjrKEmeKQoYItI6WrGdKQoYItI6WrGdKSo+KCKt1ayChVI39TBEJH20NiOV1MMQkXQJ1mYEF0gK1maAeiItph6GiKSLrqaXWgoYIpIuWpuRWgoYIpIuWpuRWgoYIpIuWpuRWgoYIpIutazN0KyqptAsKRFJn2rWZmhWVdOohyEi2aZZVU2jgCEi2aZZVU2jgCEi2aZZVU2jgCEi2aZZVU2jgCEi2aaKt02jWVIikn2qeNsUdfUwzOx/m9lTZva4mf2TmZ1Q8toNZrbFzJ42s3eVbL/Y37bFzK6v5/NFRKR56h2S+hGwwDn3JuBXwA0AZjYfuAL4E+BiYJWZ5cwsB3wVeDcwH7jS31dERFKuroDhnPs359xh/+nPgLn+46XAHc65g865Z4AtwCL/tsU5t9U5NwLc4e8rIiIpl2TS+2rgh/7jU4BnS17b4W+L2i4i0lgqH1K3iklvM1sPvDrkpX7n3Fp/n37gMBB8AxayvyM8QLkyn90H+Gv8OWhmmyq1N6NmA7tb3YgG0vllW+bPbzbMmgd5C36DikXcsmVj25ctK+72tmX6/Mp4Q5IHqxgwnHMXlnvdzJYD7wGWOOeCH/8dwKklu80FnvMfR20P++xBYND/nI3OuYWV2ptF7XxuoPPLOp1fdpnZxiSPV+8sqYuBzwDvdc6VFnO5C7jCzKab2WnAGcAG4OfAGWZ2mpl14SXG76qnDSIi0hz1rsP4CjAd+JGZAfzMOXeNc+5JM1sD/BJvqOrjzrlRADO7FlgH5IBvOOeerLMNIiLSBHUFDOfc68q8NgBMWZvvnLsHuKeGjxus4T1Z0c7nBjq/rNP5ZVei52ZH0g4iIiLRVEtKRERiSV3AONrKjWS57QBmdqqZ3Wdmm83sSTP7hL99lpn9yMx+7d+f6G83M/uyf76Pm9k5rT2DePxKBY+a2Q/856eZ2cP++d3pT+LAn+hxp39+D5tZbyvbHYeZnWBm3/X/u9tsZue10/dnZp/y/21uMrPvmNkxWf7+zOwbZvb70mUGtXxfZrbc3//X/mzXypxzqboBFwGd/uPPAZ/zH88HHsNLsp8G/AYvcZ7zH58OdPn7zG/1ecQ818y2veQcTgbO8R8fh1ciZj7wd8D1/vbrS77HS/AWeBrwFuDhVp9DzPP8NPBt4Af+8zXAFf7jW4AV/uOVwC3+4yuAO1vd9hjndhvwEf9xF3BCu3x/eAuDnwFmlHxvH8ry9wdcAJwDbCrZVtX3BcwCtvr3J/qPT6z42a0++Qr/x1wGDPmPbwBuKHltHXCef1tXsn3Cfmm+ZbntZc5pLfBO4GngZH/bycDT/uNbgStL9h/fL603vPVC9wL/CfiB/x/fbo78YTP+PQb/Lv3Hnf5+1upzKHNux/s/qDZpe1t8fxypLjHL/z5+ALwr698f0DspYFT1fQFXAreWbJ+wX9QtdUNSk7R7uZEst30Kv/t+NvAwcJJz7nkA//6P/N2yeM5fAv4bMOY/7wFecEfqqJWew/j5+a+/6O+fVqcDu4B/9Ifcvm5mM2mT788591vg88B24Hm87+MR2uf7C1T7fdX0PbYkYJjZen88cfJtack+ccuNRG3Pgiy3fQIzOxb4HvBJ59xL5XYN2Zbaczaz9wCKpl+QAAACDklEQVS/d849Uro5ZFcX47U06sQb3rjZOXc2sB9vSCNKps7PH8tfijeM/RpgJl617Mmy+v1VkujvZksuoORaWG4kZcqdU2aY2TS8YDHknPu+v3mnmZ3snHvezE4Gfu9vz9o5vxV4r5ldAhyDN4TzJeAEM+v0/wotPYfg/HaYWSfwKmBv85sd2w5gh3PuYf/5d/ECRrt8fxcCzzjndgGY2feB/0j7fH+Bar+vHcDiSdvvr/QhqRuSsqOr3EiW2w54szCAfwA2O+e+WPLSXUAw82I5Xm4j2H6VP3vjLcCLQVc6jZxzNzjn5jrnevG+nx875wrAfcCf+btNPr/gvP/M3z+1f6E6534HPGtmQZG6JXgVGtri+8MbinqLmXX7/1aD82uL769Etd/XOuAiMzvR74Vd5G8rr9XJm5Bkzha8sbVf+LdbSl7rx5tV9DTw7pLtl+DNzvkNXhXdlp9HFeeb2bb77T8fryv7eMl3dgneuO+9wK/9+1n+/oZ3Ea3fAE8AC1t9DlWc62KOzJI6He8Pli3A/wGm+9uP8Z9v8V8/vdXtjnFeZwEb/e/wn/FmzbTN9wd8FngK2ATcjjfTMrPfH/AdvHzMIbyewodr+b7wcsRb/NtfxvlsrfQWEZFYUjckJSIi6aSAISIisShgiIhILAoYIiISiwKGiIjEooAhIiKxKGCIiEgsChgiIhLL/wdHuZ1vOQluowAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1c789391d68>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#没类打印50个看下结果，每类50个就有一些样本点聚类效果不太好了\n",
    "for i in range(n_clusters):\n",
    "    index = np.nonzero(train_pca_pred==i)[0]\n",
    "    x1 = train_pca[index,0]\n",
    "    x2 = train_pca[index,1]\n",
    "    for j in range(len(x1)):\n",
    "        if j < 50:  #每类打印20个\n",
    "            plt.scatter(x1[j],x2[j],color=colors[i])\n",
    "\n",
    "plt.axis([-200,1000,-200,30])\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
